# coding:utf-8

import threading
from Queue import Queue
from ThreadParser import ThreadParser
from ThreadSpider import ThreadSpider

isCrawlComplete = False
if __name__ == '__main__':
    # 采集20个页面
    page_queue = Queue(20)
    for i in range(954,964):
        page_queue.put(i)

    # 采集HTML的队列数据
    data_queue = Queue(20)

    # 打开保存数据的文件
    filename = open('duanzi.json','a')

    lock = threading.Lock()
    thread_names = ['ThreadSpider 1','ThreadSpider 2','ThreadSpider 3']
    # thread_spider = []
    for thread_name in thread_names:
        thread = ThreadSpider(thread_name, page_queue, data_queue)
        thread.daemon = False
        thread.start()
        # thread_spider.append(thread)

    page_queue.join()
    print('data_queue.size=',data_queue.qsize())

    thread_names = ['ThreadParser 1','ThreadParser 2','ThreadParser 3']
    # thread_parser = []
    for thread_name in thread_names:
        thread = ThreadParser(thread_name, filename, data_queue,lock,isCrawlComplete)
        thread.daemon = False
        thread.start()
        # thread_parser.append(thread)

    # 在此之前，data_queue入队操作已经完成

    data_queue.join()

    filename.close()
